/* 

D.Sun
2014/3/17
Combine the new NSC pull in 2014 with all previous NSC pulls.
Keep useful variables
searchdate = search begin date

*2014 Feb search : we want to use the NSC sample containing non-graduates;
as Chris pointed out, this lotto sample NSC should NOT be a subset of MA 2003-13 grad NSC file (because of the nongrads).
 
 
	The lists of files here are:
	
		1. everyone : "nsc_charter_grp1_6" search submitted in 2010
		2. MA 2010 grads: "2010_graduates" search submitted in 2011
		3. MA 2003-2010 grads: "Grad_Files 2003_2010" search in 2012
		4. nongrad (2.5K): "NSC update 3 28 2013", search in 2012
		5. MA 2003-2012 grads: "NSC_MAgrads_2003_2012 file`j' 5 8 2013" in which `j' = {A,B,C}, search submitted in 2013.
		6. MA lotto sample2014: "NSC_all_lotteryplicants.dta", search submitted in 2014
		7. MA 2003-2013 grads: "NSC_MAgrads_2003_2013 file`j' 3 17 2014" in which `j' = {A,B,C}, search submitted in 2014.
		8. MA 2003-2012 non-grads "EVER ENR _SY2003 GR8_SP NON CURRENT & NON MA PUBLIC SCH GRADS, search in 2014 
 *Sarah COhodes 9/9/2014 Pull in 2013 file for non-LTO purposes, pull in "ever enrolled" file
 *Sarah Cohodes 5/2/2015 Pull in new file with 2007-2014 GRADUATES
 *Sarah Cohodes 12/21/2017 Pull in new file with 2003+ all students
 * Sarah cohodes 1/22/2019 pull in new files with 2016-2017 grads (non grads in 2016)
 * Sarah Cohodes 8/5/2019 pull in new files with 2018 grads and nongrads (Helen code as references)
 * Sarah Cohodes 7/15/2020 pull in new files with 2019 grads  (includes grads from 2012-2019 (no nongrads) Search in Spring 2020 )
  * Sarah Cohodes 2/14/2022 pull in new files with 2020 grads  (includes grads from 2013-2020 (no nongrads) Search in Fall 2021 ) -- note, this reflects a "revised" SPSS file that corrected an error according to DESE
 * Astrid Pineda 06/29/2022 pull in new files with 2021 grads (includes grads from 2014-2021)
 * Astrid Pineda 09/01/2022 pull in new files with 2021 nongrads (includes nongrads from 2013-2021)
 * Astrid Pineda 08/02/2023 pull in new files with 2022 grads (includes grads from 2020-2022)
 * Astrid Pineda 07/10/2024 pull in new files with 2023 grads (includes grads from 2016-2023)
 * Sarah Cohodes streamline final clean 1/7/2025
*/


clear
set more off
set memory 64g
set checksum off

global nscdata "$raw/NSC/"
global save "$raw/saves/"
   *The function of the program is to clean the college names a bit for better kicking out the duplicates in the later process.

*************
*SWITCHES* 
*************

local convert =1
local file2008grad =1
local file2012grad =1
local file2015nongrad =1
local file2015grad =1
local file2016grad =1
local file2009grad =1
local file2017grad =1
local file2019grad =1
local file2019nongrad =1
local file2014nongrad =1
local file2020grad =1
local file2022grad =1
local file2023grad =1
local file2018nongrad =1
local file2021grad =1
local file2022nongrad =1
local file2024grad =1

local combine =1
local sent =1
local clean = 1

*************
*convert all files from SPSS to Stata
*************
if `convert' == 1 {
	
import spss using "${nscdata}\NSC 2003_to_2007 Grads.sav", clear 
save "${nscdata}\nsc_2003_2007_grad.dta", replace 
	
import spss using "${nscdata}\NSC 2007_to_2011 Grads.sav", clear 
save "${nscdata}\nsc_2007_2011_grad.dta", replace 
		
import spss using "${nscdata}\NSC Non-Grads 2007_to_2014.sav", clear 
save "${nscdata}\nsc_2007_2014_nongrad.dta", replace 	

import spss using "${nscdata}\NSC 2007_to_2014 Grads.sav", clear 
save "${nscdata}\nsc_2007_2014_grad.dta", replace 		

import spss using "${nscdata}\NSC 2008_to_2015 Grads.sav", clear 
save "${nscdata}\nsc_2008_2015_grad.dta", replace 	
	
import spss using "${nscdata}\NSC 2006_to_2008 Grads.sav", clear 
save "${nscdata}\nsc_2006_2008_grad.dta", replace 	
		
import spss using "${nscdata}\NSC 2006_to_2016 Grads.sav", clear 
save "${nscdata}\nsc_2006_2016_grad.dta", replace 	

import spss using "${nscdata}\NSC 2011_to_2018 Grads.sav", clear 
save "${nscdata}\nsc_2011_2018_grad.dta", replace

import spss using "${nscdata}\NSC Non-Grads 2011_to_2018.sav", clear 
save "${nscdata}\nsc_2011_2018_nongrad.dta", replace  		
	
import spss using "${nscdata}\NSC Non-Grads 2003_to_2013.sav", clear 
save "${nscdata}\nsc_2003_2013_nongrad.dta", replace 	
	
import spss using "${nscdata}\NSC 2012_to_2019 Grads.sav", clear 
save "${nscdata}\nsc_2012_2019_grad.dta", replace 	
	
import spss using "${nscdata}\NSC 2014_to_2021 Grads.sav", clear 
save "${nscdata}\nsc_2014_2021_grad.dta", replace 	
	
import spss using "${nscdata}\NSC Non-Grads 2013_to_2021.sav", clear 
save "${nscdata}\nsc_2013_2021_nongrad.dta", replace 	
	
*import spss using "${nscdata}\NSC 2015_to_2022 Grads.sav", clear  // error reading file -- we have it via Stat/Transfer
*save "${nscdata}\nsc_2015_2022_grad.dta", replace 	
	
import spss using "${nscdata}\NSC Non-Grads 2009_to_2018.sav", clear 
save "${nscdata}\nsc_2009_2018_nongrad.dta", replace 	
	
import spss using "${nscdata}\NSC 2013_to_2020 Grads - REVISED.sav", clear 
save "${nscdata}\nsc_2013_2020_grad.dta", replace 

import spss using "${nscdata}\NSC 2013_to_2021 Non-Grads.sav", clear 
save "${nscdata}\nsc_2013_2021_nongrad.dta", replace 

import spss using "${nscdata}\NSC 2013_to_2020 Grads.sav", clear 
save "${nscdata}\nsc_2013_2020_grad.dta", replace 	

import spss using "${nscdata}\NSC 2016_to_2023.sav", clear
save "${nscdata}\nsc_2016_2023_grad.dta", replace 	
	
}

*************
*2003-2007 graduates file
*************
if `file2008grad' == 1 {
					
			use  "${nscdata}nsc_2003_2007_grad", clear
			
			rename *, lower
		 
			replace sasid = subinstr(sasid, "_", "", .)
			destring sasid, replace 
			format sasid %12.0g
			  
			order sasid, before(firstname)
		
			drop namesuffix  
			
			tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			gen grad_year = searchbeginyear
			drop searchdate
			
			rename _v1  __2year4year 
			destring __2year4year, replace
		 
			duplicates drop
			
						foreach v of varlist *cip* *major*{
			cap tostring `v', replace force
			}
			foreach j in recordfound publicprivate graduated {
		
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		  
			foreach i in enrollmentbegin ///
					 enrollmentend ///
					 graduationdate {
			
				tostring `i', replace
				gen t`i' = date(`i', "YMD")
				format t`i' %td
				drop `i'
				ren t`i' `i'
			}
			
			gen indata = "2003_2007graduates"
			gen searchsubmit = 2014
			
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			save "${save}nsc_MAgrads_2003_2007_clean_file.dta", replace

		duplicates drop
		label data "MA grads 2003-07"
		save "${save}nsc_MA_2003_2007graduates_clean.dta", replace 
}

*************
*2007-2011 graduates file
*************
if `file2012grad' == 1 {
					
			use  "${nscdata}nsc_2007_2011_grad", clear
			
			rename *, lower
		 
			replace sasid = subinstr(sasid, "_", "", .)
			destring sasid, replace 
			format sasid %12.0g
			  
			order sasid, before(firstname)
		
			drop namesuffix  
			
			tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			gen grad_year = searchbeginyear
			drop searchdate
			
			rename _v1  __2year4year 
			destring __2year4year, replace
		 
			duplicates drop
			
						foreach v of varlist *cip* *major*{
			cap tostring `v', replace force
			}
			foreach j in recordfound publicprivate graduated {
		
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		  
			foreach i in enrollmentbegin ///
					 enrollmentend ///
					 graduationdate {
			
				tostring `i', replace
				gen t`i' = date(`i', "YMD")
				format t`i' %td
				drop `i'
				ren t`i' `i'
			}
		
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			
		
			gen indata = "2007_2011graduates"
			gen searchsubmit = 2014
			 
			save "${save}nsc_MAgrads_2007_2011_clean_file.dta", replace

		duplicates drop
		label data "MA grads 2007-11"
		save "${save}nsc_MA_2007_2011graduates_clean.dta", replace 
}

*************
*2007-2014 nongraduates file
*************
if `file2015nongrad' == 1 {
					
			use  "${nscdata}nsc_2007_2014_nongrad", clear
			
			rename *, lower
		 
			replace sasid = subinstr(sasid, "_", "", .)
			destring sasid, replace 
			format sasid %12.0g
			  
			order sasid, before(firstname)
		
			drop namesuffix  
			
			tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			gen grad_year = searchbeginyear
			drop searchdate
			
			rename _v1  __2year4year 
			destring __2year4year, replace
		 
			duplicates drop
			
						foreach v of varlist *cip* *major*{
			cap tostring `v', replace force
			}
			foreach j in recordfound publicprivate graduated {
		
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		  
			foreach i in enrollmentbegin ///
					 enrollmentend ///
					 graduationdate {
			
				tostring `i', replace
				gen t`i' = date(`i', "YMD")
				format t`i' %td
				drop `i'
				ren t`i' `i'
			}
			
				
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			
		
			gen indata = "2007_2014nongraduates"
			gen searchsubmit = 2015
			 
			save "${save}nsc_MAnongrads_2007_2014_clean_file.dta", replace

		duplicates drop
		label data "MA nongrads 2007-14"
		save "${save}nsc_MA_2007_2014nongraduates_clean.dta", replace 
}

*************
*2006-2017 graduates file
*************
if `file2009grad' == 1 {
					
			use  "${nscdata}nsc_2006_2008_grad", clear
			
			rename *, lower
		 
			replace sasid = subinstr(sasid, "_", "", .)
			destring sasid, replace 
			format sasid %12.0g
			  
			order sasid, before(firstname)
		
			drop namesuffix  
			
			tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			gen grad_year = searchbeginyear
			drop searchdate
			
			rename twoyearfouryear  __2year4year 
			destring __2year4year, replace
			
			destring collegesequence, replace
		 
			duplicates drop
			
						foreach v of varlist *cip* *major*{
			cap tostring `v', replace force
			}
			foreach j in recordfound publicprivate graduated {
		
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		  
			foreach i in enrollmentbegin ///
					 enrollmentend ///
					 graduationdate {
			
				tostring `i', replace
				gen t`i' = date(`i', "YMD")
				format t`i' %td
				drop `i'
				ren t`i' `i'
			}
			
					
			gen indata = "2006_2008graduates"
			gen searchsubmit = 2017
			 
			save "${save}nsc_MAgrads_2006_2008_clean_file.dta", replace

		duplicates drop
		label data "MA grads 2006-08"
		save "${save}nsc_MA_2006_2008graduates_clean.dta", replace 
}

*************
*2003-2013 non-graduates file
*************
if `file2014nongrad' == 1 {
			
			use "${nscdata}nsc_2003_2013_nongrad.dta", clear
			
			rename *, lower
			destring sasid, replace
			format sasid %12.0g
			
			foreach j in recordfoundyn publicprivate ///
						 graduated {
			
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}	
			
			foreach i in enrollmentbegin ///
						 enrollmentend ///
						 graduationdate {
				
				gen `i'1 = date( `i', "YMD")
				format `i'1 %td
				drop `i'
				rename `i'1 `i'
			}

			ren _v1  __2year4year
			drop namesuf 
			
			tostring searchdate, replace
			gen grad_year = substr(searchdate, 1, 4)
			destring grad_year, replace 
			gen searchbeginyear = grad_year 
			drop searchdate
			
			rename *, lower
			rename recordfoundyn recordfound
			
			destring collegesequence, replace

			foreach v of varlist *cip* *major*{
				cap tostring `v', replace force
				}			
			 											
			gen searchsubmit = 2014
			gen indata = "nongrad"
		
			
			duplicates drop
			label data "MA nongrads 2003-13" 
			save "${save}nsc_MA_2003_2013nongraduates_clean.dta", replace
}



*************
*2007-2015 graduates file
*************
if `file2015grad' == 1 {
	
			use "${nscdata}nsc_2007_2014_grad", clear
			   
			drop SASID_BLANK 
			rename *, lower
			destring sasid, replace force
			format sasid %12.0g
				
			drop if sasid==.
			 
			foreach j in recordfound publicprivate graduated {
			
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
			
			foreach i in enrollmentbegin ///
						 enrollmentend ///
						 graduationdate {
						
				gen `i'1 = date( `i', "YMD")
				format `i'1 %td
				drop `i'
				rename `i'1 `i'
			}
			  
			drop namesuffix  
			ren degree_major_* degreemajor*
			ren degree_cip_* degreecip*
			ren enroll_major_* enrollmajor*
			ren enroll_cip_* enrollcip*
			
			ren twoyearfouryear __2year4year
			
			destring collegeseq, replace
				foreach v of varlist *cip* *major*{
				cap tostring `v', replace force
				}		
			tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			drop searchdate
			
			gen indata = "2007_2014graduates"
			gen searchsubmit = 2015			  
			 		
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			 
			  
			duplicates drop  
			label data "MA 2007-2014 graduates, submitted 2015"
			save "${save}nsc_MA_2007_2014graduates_clean.dta", replace

}
*************
*2008-2016 graduates file
*************
if `file2016grad' == 1 {
	
			use "${nscdata}nsc_2008_2015_grad", clear
			   
			drop SASID_BLANK 
			rename *, lower
			destring sasid, replace force
			format sasid %12.0g
			
			
	
			drop if sasid==.
			rename *, lower
			 
			foreach j in recordfound publicprivate graduated {
			
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
			
			foreach i in enrollmentbegin ///
						 enrollmentend ///
						 graduationdate {
						
				gen `i'1 = date( `i', "YMD")
				format `i'1 %td
				drop `i'
				rename `i'1 `i'
			}
			  
			  
			drop namesuffix  
			ren degree_major_* degreemajor*
			ren degree_cip_* degreecip*
			ren enroll_major_* enrollmajor*
			ren enroll_cip_* enrollcip*
			
			ren twoyearfouryear __2year4year
			
			destring collegeseq, replace
			
			tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			drop searchdate
			foreach v of varlist *cip* *major*{
				cap tostring `v', replace force
				}			
			gen indata = "2007_2015graduates"
			gen searchsubmit = 2016
			  
					
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			 
			 
			duplicates drop  
			label data "MA 2007-2015 graduates, submitted 2016"
			save "${save}nsc_MA_2007_2015graduates_clean.dta", replace

}

*************
*2006-2017 graduates file
*************
if `file2017grad'== 1{		
			
				use "${nscdata}nsc_2006_2016_grad", clear
				
				rename *, lower
				destring sasid, replace 
				format sasid %12.0g
				 
				order sasid, before(firstname)
			
				drop namesuffix  
				
				tostring searchdate, replace
				gen searchbeginyear = substr(searchdate, 1, 4)
				destring searchbeginyear, replace 
				gen grad_year = searchbeginyear
				drop searchdate
				
				destring collegesequence, replace
				
				ren twoyearfouryear __2year4year
			 
				duplicates drop
				cap ren recordfoundyn recordfound
				
				foreach j in recordfound publicprivate graduated {
			
					encode `j', gen(`j'_1)
					drop `j'
					ren `j'_1 `j'
				}
			  
				foreach i in enrollmentbegin ///
						 enrollmentend ///
						 graduationdate {
				
					tostring `i', replace
					gen t`i' = date(`i', "YMD")
					format t`i' %td
					drop `i'
					ren t`i' `i'
				}
				foreach v of varlist *cip* *major*{
				cap tostring `v', replace force
				}		
				
			ren degree_major_* degreemajor*
			ren degree_cip_* degreecip*
			ren enroll_major_* enrollmajor*
			ren enroll_cip_* enrollcip*
			
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			
			
				gen indata = "2016"
				gen searchsubmit = 2017
				 
				save "${save}nsc_2016_clean_file.dta", replace 

			duplicates drop
			label data "MA grads and non grads up to 16,search in 2017"
			compress
			save "${save}nsc_MA_2006_2016graduates_clean.dta", replace 
}

*************
*2011-2019 nongraduates file
*************
if `file2018nongrad'== 1{		
			
				use "${nscdata}nsc_2009_2018_nongrad", clear
				
				rename *, lower
				destring sasid, replace 
				format sasid %12.0g
				 
				order sasid, before(firstname)
			
				drop namesuffix  
				
				tostring searchdate, replace
				gen searchbeginyear = substr(searchdate, 1, 4)
				destring searchbeginyear, replace 
				gen grad_year = searchbeginyear
				drop searchdate
					
				ren _v1 __2year4year
			 
				duplicates drop
				cap ren recordfoundyn recordfound
				
				destring collegesequence, replace
				
				foreach j in recordfound publicprivate graduated {
			
					encode `j', gen(`j'_1)
					drop `j'
					ren `j'_1 `j'
				}
			  
				foreach i in enrollmentbegin ///
						 enrollmentend ///
						 graduationdate {
				
					tostring `i', replace
					gen t`i' = date(`i', "YMD")
					format t`i' %td
					drop `i'
					ren t`i' `i'
				}
				foreach v of varlist *cip* *major*{
				cap tostring `v', replace force
				}	
				
											
				gen indata = "2019"
				gen searchsubmit = 2019
				 
			duplicates drop
			label data "MA non grads up to 18"
			compress
			save "${save}nsc_MA_2009_2018nongraduates_clean.dta", replace 
}


*************
*2011-2019 graduates file
*************

if `file2019grad'==1 {
	
		use "${nscdata}nsc_2011_2018_grad", clear
		rename *, lower
		cap drop sasid_blank
		destring sasid, replace
		
		drop if sasid==.
		format sasid %12.0g

		foreach j in recordfoundyn publicprivate graduated {
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}	
			
		rename recordfoundyn recordfound

		foreach i in enrollmentbegin enrollmentend graduationdate {
			tostring `i', replace
			gen `i'1 = date( `i', "YMD")
			format `i'1 %td
			drop `i'
			rename `i'1 `i'
		}		
		
		drop  namesuffix 
		
		ren _v1 __2year4year
			
		destring collegeseq, replace
	
		tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			drop searchdate	
			
			cap destring *, replace
			foreach v of varlist *cip* *major*{
				cap tostring `v', replace force
				}
			
			cap destring collegesequence, replace
							
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			
			
			gen indata = "2018graduates"
			gen searchsubmit = 2019
			  
			duplicates drop  
			label data "MA 2018 graduates, submitted 2019"

	save "${save}nsc_MA_2011_2018graduates_clean.dta", replace
}

*************
*2011-2019 nongraduates file
*************

if `file2019nongrad'==1{
	
	use "${nscdata}nsc_2011_2018_nongrad", clear
	rename *, lower
		
	cap drop sasid_blank
	destring sasid, replace
		
	drop if sasid==.
	format sasid %12.0g

	foreach j in recordfoundyn publicprivate graduated {
		encode `j', gen(`j'_1)
		drop `j'
		ren `j'_1 `j'
		}
	ren recordfoundyn recordfound	

	foreach i in enrollmentbegin enrollmentend graduationdate {
		tostring `i', replace
		gen `i'1 = date( `i', "YMD")
		format `i'1 %td
		drop `i'
		rename `i'1 `i'
	}		
		
	drop  namesuffix
			foreach v of varlist *cip* *major*{
				cap tostring `v', replace force
				}	
	ren _v1 __2year4year
			
	destring collegeseq, replace
	
	tostring searchdate, replace
		gen searchbeginyear = substr(searchdate, 1, 4)
		destring searchbeginyear, replace 
		drop searchdate	
			
		cap destring *, replace
				foreach v of varlist *cip* *major*{
				cap tostring `v', replace force
				}				
		gen indata = "2018nongraduates"
		gen searchsubmit = 2019
		
							
		duplicates drop  
		label data "MA 2018 non-graduates, submitted 2019"

	save "${save}nsc_MA_2011_2018nongraduates_clean.dta", replace
}

*************
*2012-2020 graduates file
*************
if  `file2020grad' == 1 {
use  "${nscdata}nsc_2012_2019_grad.dta", clear
	ren *  , lower
	drop youruniqueidentifier

destring sasid, replace
		
		drop if sasid==.
		format sasid %12.0g

		foreach j in recordfoundyn publicprivate graduated {
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		ren recordfoundyn recordfound	

		foreach i in enrollmentbegin enrollmentend graduationdate {
			tostring `i', replace
			gen `i'1 = date( `i', "YMD")
			format `i'1 %td
			drop `i'
			rename `i'1 `i'
		}		
		
		drop  namesuffix 
		
		ren _v1 __2year4year
			
		destring collegeseq, replace
	
		tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			drop searchdate	
			
			cap destring *, replace
			foreach v of varlist  *cip* *major*{
				cap tostring `v', replace force
				}
				
			
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
					
				
			gen indata = "2019graduates"
			gen searchsubmit = 2020
			  
			duplicates drop  
			label data "MA 2012-2019 graduates, submitted 2020"				
	compress
		save "${save}nsc_MA_2012_2019graduates_clean.dta", replace

}

*************
*2013-2021 graduates file
*************
if  `file2021grad' == 1 {
use  "${nscdata}nsc_2013_2020_grad.dta", clear
	ren *  , lower
	drop youruniqueidentifier

destring sasid, replace
		
		drop if sasid==.
		format sasid %12.0g

		foreach j in recordfoundyn publicprivate graduated {
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		ren recordfoundyn recordfound	

		foreach i in enrollmentbegin enrollmentend graduationdate {
			tostring `i', replace
			gen `i'1 = date( `i', "YMD")
			format `i'1 %td
			drop `i'
			rename `i'1 `i'
		}		
		
		drop  namesuffix 
		
		ren _v1 __2year4year
		replace __2year4year = "" if __2year4year=="L"
		destring __2year4year, replace
			
		destring collegeseq, replace
	
		tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			drop searchdate	
			
			cap destring *, replace
			foreach v of varlist  *cip* *major* classlev*  {
				cap tostring `v', replace force
				}
	
					
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			
			
			gen indata = "2020graduates"
			gen searchsubmit = 2021
			  
			duplicates drop  
			label data "MA 2013-2020 graduates, submitted 2021"				
	compress
		save "${save}nsc_MA_2013_2020graduates_clean.dta", replace

}

*************
*2014-2022 graduates file
*************
if  `file2022grad' == 1 {
use  "${nscdata}nsc_2014_2021_grad.dta", clear
	ren *  , lower
	drop youruniqueidentifier

destring sasid, replace
		
		drop if sasid==.
		format sasid %12.0g

		foreach j in recordfoundyn publicprivate graduated {
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		ren recordfoundyn recordfound	

		foreach i in enrollmentbegin enrollmentend graduationdate {
			tostring `i', replace
			gen `i'1 = date( `i', "YMD")
			format `i'1 %td
			drop `i'
			rename `i'1 `i'
		}		
		
		drop  namesuffix 
		
		ren _v1 __2year4year
		replace __2year4year = "" if __2year4year=="L"
		destring __2year4year, replace
			
		destring collegeseq, replace
	
		tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			drop searchdate	
			
			cap destring *, replace
			foreach v of varlist  *cip* *major* classlev*  {
				cap tostring `v', replace force
				}
	gen indata = "2021graduates"
			gen searchsubmit = 2022
			
					
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			
			  
			duplicates drop  
			label data "MA 2014-2021 graduates, submitted 2022"				
	compress
		save "${save}nsc_MA_2014_2021graduates_clean.dta", replace

}


*************
*2013-2021 non-graduates file
*************
if  `file2022nongrad' == 1 {
use  "${nscdata}nsc_2013_2021_nongrad.dta", clear
	ren *  , lower
	drop youruniqueidentifier

destring sasid, replace
		
		drop if sasid==.
		format sasid %12.0g

		foreach j in recordfoundyn publicprivate graduated {
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		ren recordfoundyn recordfound	

		foreach i in enrollmentbegin enrollmentend graduationdate {
			tostring `i', replace
			gen `i'1 = date( `i', "YMD")
			format `i'1 %td
			drop `i'
			rename `i'1 `i'
		}		
		
		drop  namesuffix 
		
		ren _v1 __2year4year
		replace __2year4year = "" if __2year4year=="Less Than 2 Years"
		replace __2year4year = "" if __2year4year=="2-year"
		replace __2year4year = "" if __2year4year=="4-year"
		destring __2year4year, replace
			
		destring collegeseq, replace
	
		tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			drop searchdate	
			
			cap destring *, replace
			foreach v of varlist  *cip* *major* degreetitle {
				cap tostring `v', replace force
				}
	gen indata = "2021nongraduates"
			gen searchsubmit = 2022
											  
			duplicates drop  
			label data "MA 2013-2021 nongraduates, submitted 2022"				
	compress
		save "${save}nsc_MA_2013_2021nongraduates_clean.dta", replace

}


*************
*2021-2023 graduates file (only includes 2020-2023 enrollees)
*************
if  `file2023grad' == 1 {
use  "${nscdata}nsc_2015_2022_grad.dta", clear
	ren *  , lower
	drop youruniqueidentifier

destring sasid, replace
		
		drop if sasid==.
		format sasid %12.0g

		foreach j in recordfoundyn publicprivate graduated {
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		ren recordfoundyn recordfound	

		foreach i in enrollmentbegin enrollmentend graduationdate {
			tostring `i', replace
			gen `i'1 = date( `i', "YMD")
			format `i'1 %td
			drop `i'
			rename `i'1 `i'
		}		
		
		drop  namesuffix 
		
		replace __2year4year = "" if __2year4year=="L"
		destring __2year4year, replace
			
		destring collegeseq, replace
	
		tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			drop searchdate	
			
			cap destring *, replace
			foreach v of varlist  *cip* *major* classlev*  {
				cap tostring `v', replace force
				}
			gen indata = "2022graduates"
			gen searchsubmit = 2023
			
					
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			
			  
			duplicates drop  
			label data "MA 2015-2022 graduates, submitted 2023"				
	compress
		save "${save}nsc_MA_2015_2022graduates_clean.dta", replace

}

*************
*2016-2023 graduates file (only includes 2016-2024 enrollees)
*************
if  `file2024grad' == 1 {
use  "${nscdata}nsc_2016_2023_grad.dta", clear
	ren *  , lower
	drop youruniqueidentifier 
		
		drop if sasid==.
		format sasid %12.0g

		foreach j in recordfoundyn publicprivate graduated {
				encode `j', gen(`j'_1)
				drop `j'
				ren `j'_1 `j'
			}
		ren recordfoundyn recordfound	
		
		foreach i in enrollmentbegin enrollmentend graduationdate {
			tostring `i', replace
			gen `i'1 = date( `i', "YMD")
			format `i'1 %td
			drop `i'
			rename `i'1 `i'
		}		
		
		drop  namesuffix 
		
		rename _v1  __2year4year 
			destring __2year4year, replace
			
		destring collegeseq, replace
	
		tostring searchdate, replace
			gen searchbeginyear = substr(searchdate, 1, 4)
			destring searchbeginyear, replace 
			drop searchdate	
			
			cap destring *, replace //this only covers  fully numeric vars
			foreach v of varlist  *cip* *major* classlev*  {
				tostring `v', replace force
				}
	gen indata = "2023graduates"
			gen searchsubmit = 2024
			
					
			g degreecip = degreecip1
			replace degreecip = "4506" if regexm(upper(degreemajor1), "ECONOMICS") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "BUSINESS|FINANCE") & degreecip1 == ""
			replace degreecip = "52" if regexm(upper(degreemajor1), "OPERATIONS MANAG") & degreecip1 == ""
			replace degreecip = "5214" if regexm(upper(degreemajor1), "MARKETING") & degreecip1 == ""
			replace degreecip = "5213" if regexm(upper(degreemajor1), "MANAGEMENT") & degreecip1 == ""
			replace degreecip = "45" if regexm(upper(degreemajor1), "ANTHRO|ARCHEO|CRIMINO|DEMOGRA|POLITIC|SOCIOLO|HUMANI|GOVERN|GEOGRA") & degreecip1 == ""
			replace degreecip = "50" if regexm(upper(degreemajor1), "VISUAL|PERFORM|ART|DANCE|PHOTO|FILM|MUSIC|MEDIA|ENTERTAIN|DRAMA|CRAFT|THEATRE|THEATER") & degreecip1 == ""
			replace degreecip = "54" if regexm(upper(degreemajor1), "HISTORY") & degreecip1 == ""
			replace degreecip = "42" if regexm(upper(degreemajor1), "PSYCHOLOGY") & degreecip1 == ""
			replace degreecip = "38" if regexm(upper(degreemajor1), "PHILOSO") & degreecip1 == ""
			replace degreecip = "24" if regexm(upper(degreemajor1), "LIBERAL|GENERAL STUDIES") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH|ENGLISH LANGUAGE") & degreecip1 == ""
			replace degreecip = "23" if regexm(upper(degreemajor1), "ENGLISH LIERATURE|WRITING") & degreecip1 == ""
			replace degreecip = "22" if regexm(upper(degreemajor1), "LEGAL|LAW") & degreecip1 == ""
			replace degreecip = "16" if regexm(upper(degreemajor1), "FOREIGN LANGUAGE|LANGUAGES|SPAN|JAPAN|CHIN|COMPARATIVE LING|FRENCH") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUCATION") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "COMMUNI") & degreecip1 == "" 
			replace degreecip = "05" if regexm(upper(degreemajor1), "ETHNIC|CULTURAL|GENDER|WOMEN") & degreecip1 == "" 
			replace degreecip = "14" if regexm(upper(degreemajor1), "ENGI") & degreecip1 == "" 
			replace degreecip = "11" if regexm(upper(degreemajor1), "COMPUT|INFORMATION TECH") & degreecip1 == "" 
			replace degreecip = "13" if regexm(upper(degreemajor1), "EDUC|CURRIC|EARLY CHI") & degreecip1 == ""
			replace degreecip = "13" if regexm(upper(degreemajor1), "SCHOOL COUN|STUDENT COUN") & degreecip1 == "" 
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "26" if  regexm(upper(degreemajor1), "BIOLOG|NEURO|BIOTECH") & degreecip1 == ""
			replace degreecip = "27" if  regexm(upper(degreemajor1), "MATH") & degreecip1 == ""
			replace degreecip = "25" if  regexm(upper(degreemajor1), "LIBRA") & degreecip1 == ""
			replace degreecip = "38" if  regexm(upper(degreemajor1), "RELIG") & degreecip1 == ""
			replace degreecip = "39" if  regexm(upper(degreemajor1), "THEOLOG") & degreecip1 == ""
			replace degreecip = "40" if  regexm(upper(degreemajor1), "ASTRON|ATMOS|GEOLOG|CHEMI|PHYSIC|METEO") & degreecip1 == ""
			replace degreecip = "44" if  regexm(upper(degreemajor1), "PUBLIC ADMIN|PUBLIC POL|SOCIAL WORK|COMMUNITY ORG|SOCIAL SERVICE|HUMAN SERV") & degreecip1 == ""
			replace degreecip = "26" if regexm(upper(degreemajor1), "BIOCHEM|BIOPHY|PHYSIO|BOTANY|ANATOMY|ZOO|GENETIC|GENOM|PHARMA|TOXICO|ECOLOG|EPIDEM|WILDLIFE") & degreecip1 == ""
			replace degreecip = "51" if regexm(upper(degreemajor1), "VETERINARY|DENT|HEALTH SCI") & degreecip1 == ""
			replace degreecip = "31" if regexm(upper(degreemajor1), "SPORT MANAGEMENT|SPORTS MANAGEMENT|KINESIO|EXER") & degreecip1 == ""
			replace degreecip = "09" if regexm(upper(degreemajor1), "PUBLIC RELA|ADVERTIS|RADIO|TELEV|JOURNA") & degreecip1 == "" 
			replace degreecip = "51" if regexm(upper(degreemajor1), "PUBLIC HEALTH|REHAB|PHYSICAL THER|PREMED|PRE-MED|PRE-VET|NUR|MEDIC|HEALTH CARE ADM") & degreecip1 == "" 
			replace degreecip = "01" if regexm(upper(degreemajor1), "PLANT|SOIL|AGRI|TURF|HORTIC|FOOD SCI|ANIMAL") & degreecip1 == "" 
			replace degreecip = "04" if regexm(upper(degreemajor1), "ARCHI") & degreecip1 == "" 	
			replace degreecip = "50" if regexm(upper(degreemajor1), "DESIGN") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL") & degreecip1 == "" 
			replace degreecip = "03" if regexm(upper(degreemajor1), "FOREST|ENVIRONMENTAL STUD|ENVIRONMENTAL SCI") & degreecip1 == "" 
			replace degreecip = "52" if regexm(upper(degreemajor1), "HUMAN RESO|HOSPITALITY|HOTEL|FASHION MERCH") & degreecip1 == "" 
			replace degreecip = "19" if regexm(upper(degreemajor1), "FOODS|NUTRITION") & degreecip1 == "" 
			replace degreecip = "43" if regexm(upper(degreemajor1), "CRIMINAL JUS|CRIME & JUST") & degreecip1 == "" 
			replace degreecip = "16" if regexm(upper(degreemajor1), "CLASSICS") & degreecip1 == "" 
			replace degreecip = "5203" if regexm(upper(degreemajor1), "ACCOUNTING") & degreecip1 == "" 
			replace degreecip = "47" if regexm(upper(degreemajor1), "AUTOMOTIVE TECH|AUTOMOB") & degreecip1 == ""	
			replace degreecip = "51" if regexm(upper(degreemajor1), "CLINICAL LAB") & degreecip1 == ""	
			replace degreecip = "19" if regexm(upper(degreemajor1), "HUMAN DEV") & degreecip1 == ""					
			
			  
			duplicates drop  
			label data "MA 2016-2023 graduates, submitted 2024"				
	compress
		save "${save}nsc_MA_2016_2023graduates_clean.dta", replace

}


*************
*Combine all data sets above
*************
if `combine' == 1 {
 
			*use "${save}nsc_charter_everyone.dta", clear
			*tostring degreetitle, replace
			use "${save}nsc_MA_2003_2007graduates_clean.dta", clear

			append using ///
			 "${save}nsc_MA_2007_2011graduates_clean.dta" 
			append using ///
			 "${save}nsc_MA_2007_2014nongraduates_clean.dta"
			append using ///
			 "${save}nsc_MA_2007_2014graduates_clean.dta"
			append using ///
			 "${save}nsc_MA_2007_2015graduates_clean.dta"
			append using ///
			 "${save}nsc_MA_2006_2008graduates_clean.dta"
			append using ///
			 "${save}nsc_MA_2006_2016graduates_clean.dta"
			append using ///
			 "${save}nsc_MA_2011_2018graduates_clean.dta"
			append using ///
			"${save}nsc_MA_2011_2018nongraduates_clean.dta"
			append using ///
			 "${save}nsc_MA_2003_2013nongraduates_clean.dta"	
			 append using ///
				 "${save}nsc_MA_2012_2019graduates_clean.dta"
			append using ///
				"${save}nsc_MA_2014_2021graduates_clean.dta"
			append using ///
				"${save}nsc_MA_2015_2022graduates_clean.dta"
			append using ///
				"${save}nsc_MA_2009_2018nongraduates_clean.dta"
			append using ///
				"${save}nsc_MA_2013_2020graduates_clean.dta"
			append using ///
				"${save}nsc_MA_2013_2021nongraduates_clean.dta"
			compress
			append using ///
			    "${save}nsc_MA_2016_2023graduates_clean.dta"

			*duplicates drop // there aren't exact dups bc of searchsubmit dates, this takes a long time
			compress
				*missed a few things		
			replace classlevel=class_level if classlevel==""
			replace enrollmentmajor2 = enrollmentmaor2 if enrollmentmajor2=="" 
			replace enrollmentmajor2 = enrollmajor2 if enrollmentmajor2=="" 
			replace enrollmentmajor1 = enrollmajor1 if enrollmentmajor1=="" 
			replace enrollmentcip2 = enrollcip2 if enrollmentcip2=="" 
			replace enrollmentcip1 = enrollcip1 if enrollmentcip1=="" 			
			drop class_level enrollmajor2 enrollmentmaor2 enrollmajor1 enrollcip1  enrollcip2
			
			
			label data "appended NSC as of 2024 search"
			save "${data_clean}/NSC_allpended_July2024_withmajors.dta", replace
			
			tab indata
			tab searchsubmit
 
}


*************
*record cases sent to NSC for attrition*************
*************
if `sent' == 1 {
	

use "${data_clean}/NSC_allpended_July2024_withmajors.dta" , clear
keep sasid
// drop kids with missing ID
drop if missing(sasid)
	duplicates drop
	g sent_to_nsc=1
	save "${data_clean}/sent_to_nsc.dta", replace
}

*************
*initial clean
*************

if `clean' == 1 {


use "${data_clean}/NSC_allpended_July2024_withmajors.dta" , clear
// drop kids with missing ID
drop if missing(sasid)
// drop unneeded variables
drop firstname middleinitial lastname indata   classlevel

// rename and label variables
ren	collegecodebranch	ID_FSC
ren	collegename	college
ren	collegestate	college_state
ren	__2year4year	college_years
ren	enrollmentstatus	enrollment
ren	recordfound	match
ren	publicprivate	college_ownership
ren	graduated	graduated
ren	enrollmentbegin	term_start_date
ren	enrollmentend	term_end_date
ren	graduationdate	graduated_date
ren	searchsubmit	NSCdatayear
ren degreetitle 	degree_title

label var	ID_FSC	"Federal School Code"
label var	college	"college"
label var	college_state	"college state"
label var	college_years	"number of years in college program"
label var	enrollment	"student's enrollment time status (Full, Part, etc)"
label var	match	"enrollment record found in NSC"
label var	college_ownership	"college is public or private"
label var	graduated	"has the student graduated?"
label var	term_start_date	"start of term"
label var	term_end_date	"end of term"
label var	graduated_date	"graduation date"
label var	NSCdatayear	"year this record was received from NSC	"
label var 	degree_title "type of degree"

// keep only matched records
keep if (match == "Y":recordfound_1)
 replace degreemajor1="" if degreemajor1=="."
 
compress

// encode string variables
replace college_state="1" if college_state=="AL"
replace college_state="2" if college_state=="AK"
replace college_state="4" if college_state=="AZ"
replace college_state="5" if college_state=="AR"
replace college_state="6" if college_state=="CA"
replace college_state="8" if college_state=="CO"
replace college_state="9" if college_state=="CT"
replace college_state="10" if college_state=="DE"
replace college_state="11" if college_state=="DC"
replace college_state="12" if college_state=="FL"
replace college_state="13" if college_state=="GA"
replace college_state="15" if college_state=="HI"
replace college_state="16" if college_state=="ID"
replace college_state="17" if college_state=="IL"
replace college_state="18" if college_state=="IN"
replace college_state="19" if college_state=="IA"
replace college_state="20" if college_state=="KS"
replace college_state="21" if college_state=="KY"
replace college_state="22" if college_state=="LA"
replace college_state="23" if college_state=="ME"
replace college_state="24" if college_state=="MD"
replace college_state="25" if college_state=="MA"
replace college_state="26" if college_state=="MI"
replace college_state="27" if college_state=="MN"
replace college_state="28" if college_state=="MS"
replace college_state="29" if college_state=="MO"
replace college_state="30" if college_state=="MT"
replace college_state="31" if college_state=="NE"
replace college_state="32" if college_state=="NV"
replace college_state="33" if college_state=="NH"
replace college_state="34" if college_state=="NJ"
replace college_state="35" if college_state=="NM"
replace college_state="36" if college_state=="NY"
replace college_state="37" if college_state=="NC"
replace college_state="38" if college_state=="ND"
replace college_state="39" if college_state=="OH"
replace college_state="40" if college_state=="OK"
replace college_state="41" if college_state=="OR"
replace college_state="42" if college_state=="PA"
replace college_state="44" if college_state=="RI"
replace college_state="45" if college_state=="SC"
replace college_state="46" if college_state=="SD"
replace college_state="47" if college_state=="TN"
replace college_state="48" if college_state=="TX"
replace college_state="49" if college_state=="UT"
replace college_state="50" if college_state=="VT"
replace college_state="51" if college_state=="VA"
replace college_state="53" if college_state=="WA"
replace college_state="54" if college_state=="WV"
replace college_state="55" if college_state=="WI"
replace college_state="56" if college_state=="WY"
replace college_state="60" if college_state=="AS"
replace college_state="64" if college_state=="FM"
replace college_state="66" if college_state=="GU"
replace college_state="68" if college_state=="MH"
replace college_state="69" if college_state=="MP"
replace college_state="70" if college_state=="PW"
replace college_state="72" if college_state=="PR"
replace college_state="74" if college_state=="UM"
replace college_state="78" if college_state=="VI"
replace college_state="100" if college_state=="FO"
replace college_state="101" if college_state=="MX"
destring college_state, replace
label define states 1 "Alabama" , add
label define states 2 "Alaska" , add
label define states 4 "Arizona" , add
label define states 5 "Arkansas" , add
label define states 6 "California" , add
label define states 8 "Colorado" , add
label define states 9 "Connecticut" , add
label define states 10 "Delaware" , add
label define states 11 "District of Columbia" , add
label define states 12 "Florida" , add
label define states 13 "Georgia" , add
label define states 15 "Hawaii" , add
label define states 16 "Idaho" , add
label define states 17 "Illinois" , add
label define states 18 "Indiana" , add
label define states 19 "Iowa" , add
label define states 20 "Kansas" , add
label define states 21 "Kentucky" , add
label define states 22 "Louisiana" , add
label define states 23 "Maine" , add
label define states 24 "Maryland" , add
label define states 25 "Massachusetts" , add
label define states 26 "Michigan" , add
label define states 27 "Minnesota" , add
label define states 28 "Mississippi" , add
label define states 29 "Missouri" , add
label define states 30 "Montana" , add
label define states 31 "Nebraska" , add
label define states 32 "Nevada" , add
label define states 33 "New Hampshire" , add
label define states 34 "New Jersey" , add
label define states 35 "New Mexico" , add
label define states 36 "New York" , add
label define states 37 "North Carolina" , add
label define states 38 "North Dakota" , add
label define states 39 "Ohio" , add
label define states 40 "Oklahoma" , add
label define states 41 "Oregon" , add
label define states 42 "Pennsylvania" , add
label define states 44 "Rhode Island" , add
label define states 45 "South Carolina" , add
label define states 46 "South Dakota" , add
label define states 47 "Tennessee" , add
label define states 48 "Texas" , add
label define states 49 "Utah" , add
label define states 50 "Vermont" , add
label define states 51 "Virginia" , add
label define states 53 "Washington" , add
label define states 54 "West Virginia" , add
label define states 55 "Wisconsin" , add
label define states 56 "Wyoming" , add
label define states 59 "Bureau of Indian Education" , add
label define states 60 "American Samoa" , add
label define states 63 "Department of Defense Education Activity" , add
label define states 64 "Federated States of Micronesia" , add
label define states 66 "Guam" , add
label define states 68 "Marshall Islands" , add
label define states 69 "Northern Mariana Islands" , add
label define states 70 "Palau" , add
label define states 72 "Puerto Rico" , add
label define states 74 "U.S. Minor Outlying Islands" , add
label define states 78 "U.S. Virgin Islands" , add
label define states 100 "foreign country" , add
label define states 101 "Mexico" , add
label values college_state states

*fill in degreemajor a bit



	save "${data_clean}/temp1.dta", replace
***************************************************
***************************************************

// NSC reports graduation dates as separate records from enrollment spells.  
// We divide these into two files for merging later with the baseline data.

*** FILE 1: ENROLLMENT ***

// drop graduated kids

use  "${data_clean}/temp1.dta", clear

keep if (graduated=="N":graduated_1)
drop graduated graduated_date match degree_title *major* *cip*

// format variables

replace enrollment = "unknown" if !regexm(enrollment,"[a-zA-Z]")
replace enrollment = "" if enrollment=="X"

replace enrollment="0" if enrollment=="A"
replace enrollment="0" if enrollment=="W"
replace enrollment="0" if enrollment=="D"
replace enrollment="1" if enrollment=="L"
replace enrollment="2" if enrollment=="Q"
replace enrollment="3" if enrollment=="H"
replace enrollment="4" if enrollment=="Part Time"
replace enrollment="5" if enrollment=="F"
replace enrollment="5" if enrollment=="Full Time"
replace enrollment="9" if enrollment=="unknown"
destring enrollment, replace
label define enrl 0 "not enrolled" , add
label define enrl 1 "less than half time" , add
label define enrl 2 "quarter time" , add
label define enrl 3 "half time" , add
label define enrl 4 "part time (general)" , add
label define enrl 5 "full time" , add
label define enrl 9 "n/a" , add
label values enrollment enrl

// drop duplicates
duplicates drop

// fill in missing college characteristic data
foreach v of varlist college { //ID_FSC 
	bysort `v': egen fillyrs = max(college_years)
	replace college_years = fillyrs if missing(college_years)
	drop fillyrs
}

// save enrollment spells
compress
save "${data_clean}/NSC_enrollment.dta", replace

***************************************************
***************************************************

*** FILE 2: DEGREES ***

// get degrees
use  "${data_clean}/temp1.dta", clear
keep if (graduated=="Y":graduated_1)
drop graduated match term_end_date term_start_date enrollment 
assert !mi(graduated_date)

// drop duplicates
duplicates drop

// save degrees
sort sasid graduated_date
compress
save "${data_clean}/NSC_degrees.dta", replace

***************************************************
***************************************************


}
